Report on the mining tasks

univariate data expmoration

#upload the bank data set
bank <- read.csv2("~/STUDIES/TBS Courses/DM course/raw data/bank.csv")
#view the head of data frame of the set loaded
head(bank)
##   age         job marital education default balance housing loan  contact
## 1  30  unemployed married   primary      no    1787      no   no cellular
## 2  33    services married secondary      no    4789     yes  yes cellular
## 3  35  management  single  tertiary      no    1350     yes   no cellular
## 4  30  management married  tertiary      no    1476     yes  yes  unknown
## 5  59 blue-collar married secondary      no       0     yes   no  unknown
## 6  35  management  single  tertiary      no     747      no   no cellular
##   day month duration campaign pdays previous poutcome  y
## 1  19   oct       79        1    -1        0  unknown no
## 2  11   may      220        1   339        4  failure no
## 3  16   apr      185        1   330        1  failure no
## 4   3   jun      199        4    -1        0  unknown no
## 5   5   may      226        1    -1        0  unknown no
## 6  23   feb      141        2   176        3  failure no
#structure of the data set
str(bank)
## 'data.frame':    4521 obs. of  17 variables:
##  $ age      : int  30 33 35 30 59 35 36 39 41 43 ...
##  $ job      : Factor w/ 12 levels "admin.","blue-collar",..: 11 8 5 5 2 5 7 10 3 8 ...
##  $ marital  : Factor w/ 3 levels "divorced","married",..: 2 2 3 2 2 3 2 2 2 2 ...
##  $ education: Factor w/ 4 levels "primary","secondary",..: 1 2 3 3 2 3 3 2 3 1 ...
##  $ default  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ balance  : int  1787 4789 1350 1476 0 747 307 147 221 -88 ...
##  $ housing  : Factor w/ 2 levels "no","yes": 1 2 2 2 2 1 2 2 2 2 ...
##  $ loan     : Factor w/ 2 levels "no","yes": 1 2 1 2 1 1 1 1 1 2 ...
##  $ contact  : Factor w/ 3 levels "cellular","telephone",..: 1 1 1 3 3 1 1 1 3 1 ...
##  $ day      : int  19 11 16 3 5 23 14 6 14 17 ...
##  $ month    : Factor w/ 12 levels "apr","aug","dec",..: 11 9 1 7 9 4 9 9 9 1 ...
##  $ duration : int  79 220 185 199 226 141 341 151 57 313 ...
##  $ campaign : int  1 1 1 4 1 2 1 2 2 1 ...
##  $ pdays    : int  -1 339 330 -1 -1 176 330 -1 -1 147 ...
##  $ previous : int  0 4 1 0 0 3 2 0 0 2 ...
##  $ poutcome : Factor w/ 4 levels "failure","other",..: 4 1 1 4 4 1 2 4 4 1 ...
##  $ y        : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#create a data frame of numerical variables
banknum <- bank[,c("age","balance","day","duration","campaign","pdays","previous")]
#create a data frame of multinomial variables
bankcat <- bank[,c("job","marital","education","contact","month","poutcome")]
#create a data frame of binary variables
bankbin <- bank[,c("default","housing","loan","y")]

We will study each set of categories one by one, we will start by the banknum set We will use the pearson correlation to determine the relation between them, for descriptive statistics we use the function describe from the psych package as it gives us better insights on the normality of the variables and the anaomalies.

#Upload the necessary packages
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(Hmisc))
suppressPackageStartupMessages(library(psych))
suppressPackageStartupMessages(library(corrplot))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(purrr))

Numerical variables exploration

we will also use the function multi.hist to perform a multiple histogram output with density plots. We will also conduct individual normality test, using the shapiro.test function.

attach(banknum)
psych::describe(banknum)
##          vars    n    mean      sd median trimmed    mad   min   max range
## age         1 4521   41.17   10.58     39   40.48  10.38    19    87    68
## balance     2 4521 1422.66 3009.64    444  802.41 658.27 -3313 71188 74501
## day         3 4521   15.92    8.25     16   15.80  10.38     1    31    30
## duration    4 4521  263.96  259.86    185  216.44 143.81     4  3025  3021
## campaign    5 4521    2.79    3.11      2    2.14   1.48     1    50    49
## pdays       6 4521   39.77  100.12     -1   11.56   0.00    -1   871   872
## previous    7 4521    0.54    1.69      0    0.12   0.00     0    25    25
##          skew kurtosis    se
## age      0.70     0.35  0.16
## balance  6.59    88.25 44.76
## day      0.09    -1.04  0.12
## duration 2.77    12.51  3.86
## campaign 4.74    37.11  0.05
## pdays    2.72     7.94  1.49
## previous 5.87    51.91  0.03
rcorr(as.matrix(banknum))
##            age balance   day duration campaign pdays previous
## age       1.00    0.08 -0.02     0.00    -0.01 -0.01     0.00
## balance   0.08    1.00 -0.01    -0.02    -0.01  0.01     0.03
## day      -0.02   -0.01  1.00    -0.02     0.16 -0.09    -0.06
## duration  0.00   -0.02 -0.02     1.00    -0.07  0.01     0.02
## campaign -0.01   -0.01  0.16    -0.07     1.00 -0.09    -0.07
## pdays    -0.01    0.01 -0.09     0.01    -0.09  1.00     0.58
## previous  0.00    0.03 -0.06     0.02    -0.07  0.58     1.00
## 
## n= 4521 
## 
## 
## P
##          age    balance day    duration campaign pdays  previous
## age             0.0000  0.2301 0.8736   0.7293   0.5500 0.8134  
## balance  0.0000         0.5597 0.2836   0.5025   0.5259 0.0782  
## day      0.2301 0.5597         0.0978   0.0000   0.0000 0.0000  
## duration 0.8736 0.2836  0.0978          0.0000   0.4853 0.2242  
## campaign 0.7293 0.5025  0.0000 0.0000            0.0000 0.0000  
## pdays    0.5500 0.5259  0.0000 0.4853   0.0000          0.0000  
## previous 0.8134 0.0782  0.0000 0.2242   0.0000   0.0000
#we chose to not visualize the correlations because there's no significant ones
#prepfereably to visualize with ggplot; rectification required
multi.hist(banknum, bcol="orange",
                    dcol="blue")

shapiro.test(age)
## 
##  Shapiro-Wilk normality test
## 
## data:  age
## W = 0.95951, p-value < 2.2e-16
shapiro.test(balance)
## 
##  Shapiro-Wilk normality test
## 
## data:  balance
## W = 0.50151, p-value < 2.2e-16
shapiro.test(day)
## 
##  Shapiro-Wilk normality test
## 
## data:  day
## W = 0.96072, p-value < 2.2e-16
shapiro.test(duration)
## 
##  Shapiro-Wilk normality test
## 
## data:  duration
## W = 0.74754, p-value < 2.2e-16
shapiro.test(campaign)
## 
##  Shapiro-Wilk normality test
## 
## data:  campaign
## W = 0.56082, p-value < 2.2e-16
shapiro.test(pdays)
## 
##  Shapiro-Wilk normality test
## 
## data:  pdays
## W = 0.47041, p-value < 2.2e-16
shapiro.test(previous)
## 
##  Shapiro-Wilk normality test
## 
## data:  previous
## W = 0.35998, p-value < 2.2e-16

Categorical variables exploration

now we proceed to analyze the categorical variables in our data set. we will visualise with barplots and then we will conduct assosiation tests.

summary(bankcat)
##           job          marital         education         contact    
##  management :969   divorced: 528   primary  : 678   cellular :2896  
##  blue-collar:946   married :2797   secondary:2306   telephone: 301  
##  technician :768   single  :1196   tertiary :1350   unknown  :1324  
##  admin.     :478                   unknown  : 187                   
##  services   :417                                                    
##  retired    :230                                                    
##  (Other)    :713                                                    
##      month         poutcome   
##  may    :1398   failure: 490  
##  jul    : 706   other  : 197  
##  aug    : 633   success: 129  
##  jun    : 531   unknown:3705  
##  nov    : 389                 
##  apr    : 293                 
##  (Other): 571
p1 <-ggplot(data = bankcat, aes(x = job)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
p2 <-ggplot(data = bankcat, aes(x = marital)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))
p3 <-ggplot(data = bankcat, aes(x = education)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
p4 <-ggplot(data = bankcat, aes(x = contact)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))
p5 <-ggplot(data = bankcat, aes(x = month)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))
p6 <-ggplot(data = bankcat, aes(x = poutcome)) +
     geom_bar(fill="orange", color="black") +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))
p1;p2;p3;p4;p5;p6

grid.arrange(p1,p2,p3,p4,p5,p6,
             top="Plot matrix of the categorical variables in the bank data set")

Binary variables explorations

Now with the categorical and numerical variables have been visualized, it’s time to proceed to the binary data. In this section we will have simple barplot of the categories in each binary variable.

#summary of the binary data
summary(bankbin)
##  default    housing     loan        y       
##  no :4445   no :1962   no :3830   no :4000  
##  yes:  76   yes:2559   yes: 691   yes: 521
p11 <-ggplot(data = bankbin, aes(x=default)) + 
            geom_bar(fill="orange", color="black") 
p12 <-ggplot(data = bankbin, aes(x=housing)) + 
            geom_bar(fill="orange", color="black")
p13 <-ggplot(data = bankbin, aes(x=loan)) + 
            geom_bar(fill="orange", color="black")
p14 <-ggplot(data = bankbin, aes(x=y)) + 
            geom_bar(fill="orange", color="black")
grid.arrange(p11,p12,p13,p14,
             top = "Plot matrix of binary data of the bank dataset")

Bivariate Exploration

In this section we will try to explore the associations between each variable type, and than the between variable’s types ### Categorical In this subsection we will construct the contingency tables of possible combinations of the categorical variables, and then we will use these tables to conduct the chi2 test.

attach(bankcat)
t1 <-table(job, marital)
t2 <-table(job, education)
t3 <-table(job, contact)
t4 <-table(job, month)
t5 <-table(job, poutcome)
t6 <-table(marital, education)
t7 <-table(marital, contact)
t8 <-table(marital, month)
t9 <-table(marital, poutcome)
t10 <-table(education,contact)
t11 <-table(education,month)
t12 <-table(education,poutcome)
t13 <-table(contact,month)
t14 <-table(contact,poutcome)
t15 <-table(month,poutcome)
tables <- list(t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15)
map(tables, chisq.test)
## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect

## Warning in .f(.x[[i]], ...): Chi-squared approximation may be incorrect
## [[1]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 373.18, df = 22, p-value < 2.2e-16
## 
## 
## [[2]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 2840, df = 33, p-value < 2.2e-16
## 
## 
## [[3]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 226.51, df = 22, p-value < 2.2e-16
## 
## 
## [[4]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 767.94, df = 121, p-value < 2.2e-16
## 
## 
## [[5]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 54.38, df = 33, p-value = 0.01097
## 
## 
## [[6]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 139.09, df = 6, p-value < 2.2e-16
## 
## 
## [[7]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 29.519, df = 4, p-value = 6.133e-06
## 
## 
## [[8]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 52.912, df = 22, p-value = 0.0002336
## 
## 
## [[9]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 6.9389, df = 6, p-value = 0.3265
## 
## 
## [[10]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 147.01, df = 6, p-value < 2.2e-16
## 
## 
## [[11]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 205.06, df = 33, p-value < 2.2e-16
## 
## 
## [[12]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 13.551, df = 9, p-value = 0.1392
## 
## 
## [[13]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 2401.1, df = 22, p-value < 2.2e-16
## 
## 
## [[14]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 377.92, df = 6, p-value < 2.2e-16
## 
## 
## [[15]]
## 
##  Pearson's Chi-squared test
## 
## data:  .x[[i]]
## X-squared = 693.97, df = 33, p-value < 2.2e-16

Binary

In this subsection we will explore the inter-binary association within the binary data. As usual, we construct the contingency tables, and then we conduct the chi2 test.

attach(bankbin)
t21 <-table(default, housing)
t22 <-table(default, loan)
t23 <-table(default, y)
t24 <-table(housing, loan)
t25 <-table(housing, y)
t26 <-table(loan, y)
tables1 <- list(t21, t22, t23, t24, t25, t26)
map(tables1, chisq.test)
## [[1]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 0.11967, df = 1, p-value = 0.7294
## 
## 
## [[2]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 17.157, df = 1, p-value = 3.441e-05
## 
## 
## [[3]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 1.1844e-27, df = 1, p-value = 1
## 
## 
## [[4]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 1.4374, df = 1, p-value = 0.2306
## 
## 
## [[5]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 48.885, df = 1, p-value = 2.715e-12
## 
## 
## [[6]]
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .x[[i]]
## X-squared = 21.872, df = 1, p-value = 2.915e-06

Binarys vs numerical data

In this subsection we will explore the association between “y”, and the rest of the binary variables, this is mainly by conduction boxplots visualization using the ggplot2 package.

attach(bank)
## The following objects are masked from bankbin:
## 
##     default, housing, loan, y
## The following objects are masked from bankcat:
## 
##     contact, education, job, marital, month, poutcome
## The following objects are masked from banknum:
## 
##     age, balance, campaign, day, duration, pdays, previous
a1 <-ggplot(data = bank, aes(x= y, y= age))+ 
     geom_boxplot(fill= "orange", color="black") + 
     coord_flip()
a2 <-ggplot(data = bank, aes(x= y, y= balance))+ 
     geom_boxplot(fill= "orange", color="black") + 
     coord_flip()
a3 <-ggplot(data = bank, aes(x= y, y= day))+ 
     geom_boxplot(fill= "orange", color="black") + 
     coord_flip()
a4 <-ggplot(data = bank, aes(x= y, y= duration))+ 
     geom_boxplot(fill= "orange", color="black")+ 
     coord_flip()
a5 <-ggplot(data = bank, aes(x= y, y= campaign))+ 
     geom_boxplot(fill= "orange", color="black") + 
     coord_flip()
a6 <-ggplot(data = bank, aes(x= y, y= pdays)) + 
     geom_boxplot(fill= "orange", color="black") + 
     coord_flip()
a1 

a2

a3

a4

a5

a6

grid.arrange(a1,a2,a3,a4,a5,a6,
             top="Binary variables difference of groups regarding to 'y'")

“Housing” vs the rest og the numerical variables

attach(bank)
## The following objects are masked from bank (pos = 3):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bankbin:
## 
##     default, housing, loan, y
## The following objects are masked from bankcat:
## 
##     contact, education, job, marital, month, poutcome
## The following objects are masked from banknum:
## 
##     age, balance, campaign, day, duration, pdays, previous
a11 <-ggplot(data = bank, aes(x= housing, y= age)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a21 <-ggplot(data = bank, aes(x= housing, y= balance)) + 
      geom_boxplot(fill= "orange", color= "black") + 
      coord_flip()
a31 <-ggplot(data = bank, aes(x= housing, y= day)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a41 <-ggplot(data = bank, aes(x= housing, y= duration)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a51 <-ggplot(data = bank, aes(x= housing, y= campaign)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a61 <-ggplot(data = bank, aes(x= housing, y= pdays)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a11

a21

a31

a41

a51

a61

grid.arrange(a11,a21,a31,a41,a51,a61,
             top= "Numerical data and their propreties comparing to housing")

“Loan” vs the rest of the numerical variables

attach(bank)
## The following objects are masked from bank (pos = 3):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 4):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bankbin:
## 
##     default, housing, loan, y
## The following objects are masked from bankcat:
## 
##     contact, education, job, marital, month, poutcome
## The following objects are masked from banknum:
## 
##     age, balance, campaign, day, duration, pdays, previous
a12 <-ggplot(data = bank, aes(x= loan, y= age)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a22 <-ggplot(data = bank, aes(x= loan, y= balance)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a32 <-ggplot(data = bank, aes(x= loan, y= day)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a42 <-ggplot(data = bank, aes(x= loan, y= duration)) + 
  geom_boxplot(fill= "orange", color="black") + 
  coord_flip()
a52 <-ggplot(data = bank, aes(x= loan, y= campaign)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a62 <-ggplot(data = bank, aes(x= loan, y= pdays)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a12

a22

a32

a42

a52

a62

grid.arrange(a12,a22,a32,a42,a52,a62,
             top="numerical variables and their propreties comparing to 'loan'")

“default” vs the rest of the categorical variables

attach(bank)
## The following objects are masked from bank (pos = 3):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 4):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 5):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bankbin:
## 
##     default, housing, loan, y
## The following objects are masked from bankcat:
## 
##     contact, education, job, marital, month, poutcome
## The following objects are masked from banknum:
## 
##     age, balance, campaign, day, duration, pdays, previous
a13 <-ggplot(data = bank, aes(x= default, y= age)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a23 <-ggplot(data = bank, aes(x= default, y= balance)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a33 <-ggplot(data = bank, aes(x= default, y= day)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a43 <-ggplot(data = bank, aes(x= default, y= duration)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a53 <-ggplot(data = bank, aes(x= default, y= campaign)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a63 <-ggplot(data = bank, aes(x= default, y= pdays)) + 
      geom_boxplot(fill= "orange", color="black") + 
      coord_flip()
a13

a23

a33

a43

a53

a63

grid.arrange(a13,a23,a33,a43,a53,a63,
             top ="the variable default in comparision to numerical variables")

Numerical variables and the responses in “y”

In this subsection we will explore the numerical variables pair-wise while discriminating them according to the response in the variable “y”

“age” vs the rest of the numerical variables

attach(bank)
## The following objects are masked from bank (pos = 3):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 4):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 5):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bank (pos = 6):
## 
##     age, balance, campaign, contact, day, default, duration,
##     education, housing, job, loan, marital, month, pdays,
##     poutcome, previous, y
## The following objects are masked from bankbin:
## 
##     default, housing, loan, y
## The following objects are masked from bankcat:
## 
##     contact, education, job, marital, month, poutcome
## The following objects are masked from banknum:
## 
##     age, balance, campaign, day, duration, pdays, previous
b11 <-ggplot(data = bank, aes( y= age))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b21 <-ggplot(data = bank, aes( y= age))+
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b31 <-ggplot(data = bank, aes( y= age))+ 
      geom_boxplot(aes(fill = y)) +
      facet_grid(education~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b41 <-ggplot(data = bank, aes( y= age))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b51 <-ggplot(data = bank, aes( y= age))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b61 <-ggplot(data = bank, aes( y= age))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b11

b21

b31

b41

b51

b61

“balance” vs the rest of the numerical variables

b12 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b22 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b32 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(education~.) +
      coord_flip() +
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b42 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) + 
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b52 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b62 <-ggplot(data = bank, aes( y= balance)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b12

b22

b32

b42

b52

b62

“day” vs the rest of the numerical variables

b13 <-ggplot(data = bank, aes( y= day)) +
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b23 <-ggplot(data = bank, aes( y=day)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b33 <-ggplot(data = bank, aes( y= day))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(education~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b43 <-ggplot(data = bank, aes( y= day)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b53 <-ggplot(data = bank, aes( y= day)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b63 <-ggplot(data = bank, aes( y= day)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1)) 
b13

b23

b33

b43

b53

b63

“duration” vs the rest of the numerical variables

b14 <-ggplot(data = bank, aes( y= duration)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b24 <-ggplot(data = bank, aes( y= duration))+ 
      geom_boxplot(aes(fill = y)) + facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b34 <-ggplot(data = bank, aes( y= duration)) +  
      geom_boxplot(aes(fill = y)) +  
      facet_grid(education~.) +
      coord_flip()+
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b44 <-ggplot(data = bank, aes( y= duration)) + 
      geom_boxplot(aes(fill = y)) +  
      facet_grid(contact~.) + 
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b54 <-ggplot(data = bank, aes( y= duration)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b64 <-ggplot(data = bank, aes( y= duration)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() +
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b14

b24

b34

b44

b54

b64

“campaign” vs the rest of the numerical variables

b15 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b25 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b35 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(education~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b45 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) + 
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b55 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b65 <-ggplot(data = bank, aes( y= campaign)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) + 
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b15

b25

b35

b45

b55

b65

“pdays” vs the rest of the numerical variables

b16 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b26 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b36 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(education~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b46 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b56 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) +
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b66 <-ggplot(data = bank, aes( y= pdays)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b16

b26

b36

b46

b56

b66

“previous” vs the rest of the numerical variables

b17 <-ggplot(data = bank, aes( y= previous)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(job~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b27 <-ggplot(data = bank, aes( y= previous))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(marital~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b37 <-ggplot(data = bank, aes( y= previous))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(education~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b47 <-ggplot(data = bank, aes( y= previous))+ 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(contact~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b57 <-ggplot(data = bank, aes( y= previous)) + 
      geom_boxplot(aes(fill = y)) + 
      facet_grid(month~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b67 <-ggplot(data = bank, aes( y= previous)) +
      geom_boxplot(aes(fill = y)) + 
      facet_grid(poutcome~.) +
      coord_flip() + 
      theme(strip.text.y = element_text(angle = 360, hjust = 1))
b17

b27

b37

b47

b57

b67